package com.cyc.tool.owltools;
/*
* #%L
* OwlTools
* %%
* Copyright (C) 2015 Cycorp, Inc
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
//import com.cyc.tool.distributedrepresentations.GoogleNewsW2VSpace;
//import com.cyc.tool.distributedrepresentations.Word2VecSpace;
import com.google.common.collect.Iterables;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentNavigableMap;
import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.io.FileDocumentSource;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotation;
import org.semanticweb.owlapi.model.OWLAnnotationProperty;
import org.semanticweb.owlapi.model.OWLClass;
import org.semanticweb.owlapi.model.OWLDataFactory;
import org.semanticweb.owlapi.model.OWLLogicalEntity;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.semanticweb.owlapi.reasoner.Node;
import org.semanticweb.owlapi.reasoner.NodeSet;
import org.semanticweb.owlapi.reasoner.OWLReasoner;
import org.semanticweb.owlapi.reasoner.OWLReasonerFactory;
import org.semanticweb.owlapi.reasoner.structural.StructuralReasonerFactory;
import org.semanticweb.owlapi.search.EntitySearcher;
import org.semanticweb.owlapi.vocab.OWLRDFVocabulary;
/**
* <P>
* OpenCycOwl has methods for accessing information in an OpenCyc OWL file.
* There is some known overlap with this class, {@link OpenCycReasoner},
* and {@link OpenCycContent}.
*
*/
public class OpenCycOwl {
static final String ocycLocation = OwlToolsConfig.ocycLocation;
/**
* HLID for testing puproses.
*/
public String pizzaGUID = "Mx4rvVibapwpEbGdrcN5Y29ycA";
private final boolean clearLabels = false;
private final OWLDataFactory dataFactory;
private final OWLOntologyManager manager;
private OWLOntology openCyc;
private final OWLAnnotationProperty prettyString;
private final OWLAnnotationProperty rdfsLabel;
private OWLReasoner reasoner;
private final OWLReasonerFactory reasonerFactory;
private long t; // time keeper
Set<String> allConcepts;
final Map<String, Set<String>> conceptLabels;
Set<String> conceptsWithTerms;
DB db;
ConcurrentNavigableMap<String, Set<String>> ocycConceptForTermLabel;
ConcurrentNavigableMap<String, Set<String>> ocycConceptForTermLower;
ConcurrentNavigableMap<String, Set<String>> ocycConceptForTermPrettyString;
ConcurrentNavigableMap<String, Set<String>> typeGraph;
/**
* Creates a new instance of OwlTest.
* @throws java.io.IOException
* @throws org.semanticweb.owlapi.model.OWLOntologyCreationException
*/
public OpenCycOwl() throws IOException, OWLOntologyCreationException {
// A simple example of how to load and save an ontology We first need to
// obtain a copy of an OWLOntologyManager, which, as the name suggests,
// manages a set of ontologies. An ontology is unique within an ontology
// manager. Each ontology knows its ontology manager. To load multiple
// copies of an ontology, multiple managers would have to be used.
manager = OWLManager.createOWLOntologyManager();
// We load an ontology from a document IRI - in this case we'll load the
// pizza ontology.
// IRI documentIRI = IRI.create(PIZZA_IRI);
// Now ask the manager to load the ontology
// OWLOntology ontology = manager
// .loadOntologyFromOntologyDocument(documentIRI);
// but in this test we don't rely on a remote ontology and load it from
// a string
//play with mapr
// System.out.println(Arrays.asList(1,2,3,4,5,6,7,8).stream().map(x->x*x).reduce((x,y)->x+y).get());
db = DBMaker.newFileDB(new File(OwlToolsConfig.getOcycTermDBFile()))
.closeOnJvmShutdown()
// .encryptionEnable("password")
.make();
reasonerFactory = new StructuralReasonerFactory();
dataFactory = manager.getOWLDataFactory();
prettyString = dataFactory.getOWLAnnotationProperty(
guidToIRI("Mx4rwLSVCpwpEbGdrcN5Y29ycA"));
rdfsLabel = dataFactory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_LABEL.getIRI());
this.getPrettyStringToConceptMap();
this.getRDFSLabelConceptMap();
this.getLowerCaseConceptMap();
this.createTypeGraph();
conceptLabels = new HashMap<>();
this.fillConceptLabels();
}
/**
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
OpenCycOwl my = new OpenCycOwl();
System.out.println("N Classes:" + my.getOpenCyc().getClassesInSignature().size());
my.pizzaTest();
// Remove the ontology from the manager
my.manager.removeOntology(my.getOpenCyc());
}
/**
*
* @return the allConcepts Set
* @throws IOException
*/
public Set<String> allConcepts() throws IOException {
allConcepts = db.getHashSet(OwlToolsConfig.getAllConceptsName());
if (allConcepts.isEmpty()) {
Set<String> res
= getOpenCyc().
getClassesInSignature()
.stream()
.map(clss -> {
String csid = clss.toStringID();
String s = guidFromURLString(csid);
System.out.println("AC:" + csid + " " + s);
return s;
})
.collect(Collectors.toSet());
allConcepts.addAll(res);
db.commit();
}
return allConcepts;
}
/**
* Close the ontology access
*/
public void close() {
if (openCyc != null) {
manager.removeOntology(openCyc);
}
}
/**
*
* @param term
* @return all concepts for a given term String
*/
public Set<String> conceptsFor(String term) {
Set<String> ret = new HashSet<>();
if (ocycConceptForTermPrettyString.containsKey(term)) {
ret.addAll(ocycConceptForTermPrettyString.get(term));
}
if (ocycConceptForTermLabel.containsKey(term)) {
ret.addAll(ocycConceptForTermLabel.get(term));
}
String l = term.toLowerCase(Locale.ENGLISH);
if (ocycConceptForTermLower.containsKey(l)) {
ret.addAll(ocycConceptForTermLower.get(l));
}
if (term.contains("_")) {
ret.addAll(conceptsFor(term.replace("_", " ")));
}
return ret;
}
/**
*
* @return Set of concepts with terms in the W2V space
* @throws IOException
*/
// public Set<String> conceptsWithW2VTerms() throws IOException {
// /* @Todo: Consider making this more independent of the particular W2V space */
// Word2VecSpace w2v = GoogleNewsW2VSpace.get();
// conceptsWithTerms = db.getHashSet(OwlToolsConfig.getConceptsWithTermsName());
// if (conceptsWithTerms.isEmpty()) {
// Set<String> res
// = Stream.concat(
// Stream.concat(
// ocycConceptForTermPrettyString.entrySet().stream(),
// ocycConceptForTermLabel.entrySet().stream()),
// ocycConceptForTermLower.entrySet().stream())
// .filter(s -> w2v.knownTerm(s.getKey()))
// .map(s -> s.getValue())
// .flatMap(conceptSet -> conceptSet.stream())
// .collect(Collectors.toSet());
// conceptsWithTerms.addAll(res);
// db.commit();
// }
// return conceptsWithTerms;
// }
/**
*
* @param forT
* @return Set of types for a term
*/
public Set<String> getTypes(String forT) {
Set<String> ret = new HashSet<>();
if (typeGraph.containsKey(forT)) {
return typeGraph.get(forT);
}
if (forT.equals("Thing")) {
return ret;
}
// System.out.println("No types for :" + guidToURLString(forT));
return ret;
}
/**
*
* @param conceptGUID
* @return Set of types for a concept
* @throws OWLOntologyCreationException
*/
public Set<String> getTypesForConceptFromOWL(String conceptGUID) throws OWLOntologyCreationException {
Set<String> types = new HashSet<>();
OWLClass concept
= dataFactory.getOWLClass(guidToIRI(conceptGUID));
NodeSet<OWLClass> subClasses = getReasoner()
.getSuperClasses(concept, true);
subClasses.forEach(node -> {
Set<OWLClass> ents = node.getEntities();
ents.forEach(ent -> {
types.add(ent.getIRI().getShortForm());
});
});
return types;
}
/**
*
* @param forT
* @return Set of types for a term
*/
public Set<String> getTypesTransitive(String forT) {
Set<String> ret = new HashSet<>();
if (typeGraph.containsKey(forT)) {
typeGraph
.get(forT)
.forEach(t -> {
getTypesTransitive(t, ret);
});
return ret;
}
// System.out.println("PROBLEM: " + forT);
return ret;
}
/**
*
* @param forT
* @return Set of types for a term
*/
public Set<String> getTypesTransitiveURL(String forT) {
return getTypesTransitive(guidFromURLString(forT))
.stream()
.map(t -> guidToURLString(t))
.collect(Collectors.toSet());
}
/**
*
* @param forT
* @return Set of types of a term
*/
public Set<String> getTypesURL(String forT) {
return getTypes(guidFromURLString(forT))
.stream()
.map(t -> guidToURLString(t))
.collect(Collectors.toSet());
}
/**
*
* @param url
* @return GUID from a URL
*/
public String guidFromURLString(String url) {
return url.replaceFirst("http://sw.opencyc.org/concept/", "");
}
/**
*
* @param conceptGuid
* @return URL from a GUID
*/
public String guidToURLString(String conceptGuid) {
return "http://sw.opencyc.org/concept/" + conceptGuid;
}
/**
*
* @param term
* @return true if term is in the ontology
*/
public boolean knownTerm(String term) {
if (ocycConceptForTermPrettyString.containsKey(term)) {
return true;
}
if (ocycConceptForTermLabel.containsKey(term)) {
return true;
}
if (ocycConceptForTermLower.containsKey(term.toLowerCase(Locale.ENGLISH))) {
return true;
}
if (term.contains("_")) {
return knownTerm(term.replace("_", " "));
}
return false;
}
/**
*
* @param concept
* @return a String with labels for the concept
*/
public String labelsForConcept(String concept) {
if (conceptLabels.containsKey(concept)) {
return String.join("|", conceptLabels.get(concept));
}
return concept;
}
/**
*
* @return a Predicate to test if a concept is present
*/
public Predicate<String[]> noConcept() {
return a -> !Arrays.stream(a)
.anyMatch(hasConcept());
}
/**
*
* @return Number of classes in the ontology
*/
public int size() {
return getOpenCyc().getClassesInSignature().size();
}
/**
*
* @return an OWLOntology for OpenCyc
*/
protected OWLOntology getOpenCyc() {
if (openCyc == null) {
try {
t = System.currentTimeMillis();
openCyc = manager
.loadOntologyFromOntologyDocument(
new FileDocumentSource(
new File(ocycLocation)));
System.out.println("Open Cyc Load time:"
+ (System.currentTimeMillis() - t) + "ms");
} catch (OWLOntologyCreationException ex) {
Logger.getLogger(OpenCycOwl.class.getName()).log(Level.SEVERE, null, ex);
}
}
return openCyc;
}
/**
*
* @return an OWLReasoner
*/
protected OWLReasoner getReasoner() {
if (reasoner == null) {
reasoner = reasonerFactory.createReasoner(getOpenCyc());
}
return reasoner;
}
private void createTypeGraph() throws IOException {
typeGraph = db.getTreeMap(OwlToolsConfig.getTypeGraphName());
if (typeGraph.isEmpty()) {
allConcepts().
stream().
map(c -> guidFromURLString(c))
.forEach(s -> {
try {
Set<String> types = getTypesForConceptFromOWL(s);
System.out.println("Types for " + s + ": " + types.size());
typeGraph.put(s, types);
} catch (OWLOntologyCreationException ex) {
Logger.getLogger(OpenCycOwl.class.getName()).log(Level.SEVERE, null, ex);
}
});
db.commit();
db.compact();
}
}
private void fillConceptLabels() {
t = System.currentTimeMillis();
Iterables.concat(ocycConceptForTermLabel.entrySet(),
ocycConceptForTermLabel.entrySet(),
ocycConceptForTermPrettyString.entrySet()).forEach(entry -> {
Set<String> concepts = entry.getValue();
concepts.forEach(concept -> {
if (!conceptLabels.containsKey(concept)) {
conceptLabels.put(concept, new HashSet<>());
}
conceptLabels.get(concept).add(entry.getKey());
});
});
System.out.println("Concept to term map creation:"
+ (System.currentTimeMillis() - t) + "ms");
}
private void getLowerCaseConceptMap() {
ocycConceptForTermLower = db.getTreeMap(OwlToolsConfig.getOcycTermMapName() + "_Lower");
if (clearLabels) {
ocycConceptForTermLower.clear();
}
if (ocycConceptForTermLower.isEmpty()) {
ocycConceptForTermPrettyString.keySet().forEach(s -> {
storeDownCaseLabel(s, ocycConceptForTermPrettyString);
});
ocycConceptForTermLabel.keySet().forEach(s -> {
storeDownCaseLabel(s, ocycConceptForTermLabel);
});
db.commit();
db.compact();
}
}
private void getPrettyStringToConceptMap() {
// Print out all of the classes which are contained in the signature of
// the ontology. These are the classes that are referenced by axioms in
// the ontology.
ocycConceptForTermPrettyString = db.getTreeMap(OwlToolsConfig.getOcycTermMapName());
if (clearLabels) {
ocycConceptForTermPrettyString.clear();
}
if (ocycConceptForTermPrettyString.isEmpty()) {
Iterables.concat(
getOpenCyc().getClassesInSignature(),
getOpenCyc().getIndividualsInSignature()).forEach(owlObj -> {
System.out.println("Loading PrettyStrings for "
+ (owlObj instanceof OWLClass ? "Class" : "Individual") + ": " + owlObj);
Collection<OWLAnnotation> annotations
= EntitySearcher.getAnnotations(owlObj, getOpenCyc(), prettyString);
annotations.forEach(ann -> {
storeConceptLabel(ann, owlObj, ocycConceptForTermPrettyString);
});
});
db.commit();
db.compact();
}
}
private void getRDFSLabelConceptMap() {
// Print out all of the classes which are contained in the signature of
// the ontology. These are the classes that are referenced by axioms in
// the ontology.
ocycConceptForTermLabel = db.getTreeMap(OwlToolsConfig.getOcycTermMapName() + "_Label");
if (clearLabels) {
ocycConceptForTermLabel.clear();
}
if (ocycConceptForTermLabel.isEmpty()) {
// Get the terms for collections and individuals
Iterables.concat(
getOpenCyc().getClassesInSignature(),
getOpenCyc().getIndividualsInSignature()).forEach(owlObj -> {
System.out.println("Loading RDFS Labels for "
+ (owlObj instanceof OWLClass ? "Class" : "Individual") + ": " + owlObj);
Collection<OWLAnnotation> annotations
= EntitySearcher.getAnnotations(owlObj, getOpenCyc(), rdfsLabel);
annotations.forEach(ann -> {
storeConceptLabel(ann, owlObj, ocycConceptForTermLabel);
});
});
db.commit();
db.compact();
}
}
private void getTypesTransitive(String forT, Set<String> soFar) {
if (!soFar.contains(forT)) {
soFar.add(forT);
if (forT.equals("Thing")) {
return;
}
getTypes(forT)
.forEach(st -> {
getTypesTransitive(st, soFar);
});
}
}
private IRI guidToIRI(String conceptGuid) {
return IRI.create(guidToURLString(conceptGuid));
}
private Predicate<String> hasConcept() {
return a -> knownTerm(a);
}
private void pizzaTest() {
// Now save a copy to another location in OWL/XML format (i.e. disregard
// the format that the ontology was loaded in).
//File f = folder.newFile("owlapiexample_example1.xml");
//IRI documentIRI2 = IRI.create(f);
//manager.saveOntology(ontology, new OWLXMLDocumentFormat(), documentIRI2);
OWLClass pizza
= dataFactory.getOWLClass(guidToIRI(pizzaGUID));
NodeSet<OWLClass> subClses = getReasoner().getSubClasses(pizza, true);
// Set<OWLObjectProperty>op=pizza.getObjectPropertiesInSignature();
t = System.currentTimeMillis();
Collection<OWLAnnotation> anns
= EntitySearcher.getAnnotations(pizza, getOpenCyc(), prettyString);
System.out.println("Search time:" + (System.currentTimeMillis() - t) + "ms");
anns.forEach(ann
-> System.out.println(ann.getValue().asLiteral().get().getLiteral()
));
subClses.forEach((Node<OWLClass> node) -> {
Set<OWLClass> em = node.getEntities();
em.forEach(clss -> {
System.out.println("SubType:" + clss);
Collection<OWLAnnotation> annotations = EntitySearcher.getAnnotations(clss, getOpenCyc(), prettyString);
annotations.forEach(ann -> {
String lit = ann.getValue().asLiteral().get().getLiteral();
System.out.println("\t:" + lit);
});
});
});
}
private void storeConceptLabel(OWLAnnotation ann, OWLLogicalEntity owlObj, ConcurrentNavigableMap<String, Set<String>> labelMap) {
String lit = ann.getValue().asLiteral().get().getLiteral();
final Set<String> newLabels = new HashSet<>();
if (labelMap.containsKey(lit)) {
newLabels.addAll(labelMap.get(lit));
}
newLabels.add(owlObj.toStringID());
labelMap.put(lit, newLabels);
if (lit.startsWith("the ")) { //hack to artificially extend reach
final Set<String> newLabelsThe = new HashSet<>();
String key = lit.replace("the ", "");
if (labelMap.containsKey(key)) {
newLabelsThe.addAll(labelMap.get(key));
}
newLabelsThe.add(owlObj.toStringID());
labelMap.put(key, newLabelsThe);
}
// System.out.println((sp.knownTerm(lit) ? "+" : "-") + lit);
}
private void storeDownCaseLabel(String s, ConcurrentNavigableMap<String, Set<String>> labelMap) {
final Set<String> newLabels = new HashSet<>();
String l = s.toLowerCase(Locale.ENGLISH);
if (ocycConceptForTermLower.containsKey(l)) {
newLabels.addAll(ocycConceptForTermLower.get(l));
}
newLabels.addAll(labelMap.get(s));
ocycConceptForTermLower.put(l, newLabels);
}
}